Demographics Responses

This section should help us get a handle on who is interested and what parts they are interested in.

from config import survey, default_color
import matplotlib.pyplot as plt
import pandas as pd
import altair as alt

Basic Demographic Questions

First lets look at raw data for all the demographic questions.

Employer

(alt.Chart(data=survey, title="Employer?")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="employed_by:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Rank

(alt.Chart(data=survey, title="Rank")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="job_rank:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Travel

(alt.Chart(data=survey, title="Travel Type")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="travel:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Management

(alt.Chart(data=survey, title="Snowmass Management")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="management:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Primary Frontier Interest

While the survey question says Primary, people were able to respond with multiple frontiers. And they did!

(alt.Chart(data=survey, title="Number of Primary Frontiers")
    .mark_bar()
    .transform_calculate(
        f_len='length(datum.frontier_all)'
    )
    .encode(
        y=alt.Y(shorthand="f_len:N"),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)
(alt.Chart(data=survey, title="Frontiers Selected")
    .mark_bar()
    .transform_flatten(
        flatten=['frontier_all'],
    )
    .encode(
        y=alt.Y(shorthand="frontier_all:N", sort="-x"),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Cross Frontier Interest

For the purposes of cross-frontier discussions, it would be good to understand the correlations between the primary selected frontiers.

cor_data = (
    survey[['frontier_EF', 'frontier_CF', 'frontier_NF', 'frontier_TF', 'frontier_CompF', 'frontier_RF', 'frontier_IF', 'frontier_AF', 'frontier_CommF', 'frontier_UF']]
    .rename(columns={
        'frontier_EF': 'EF',
        'frontier_CF': 'CF',
        'frontier_NF': 'NF',
        'frontier_TF': 'TF',
        'frontier_CompF': "CompF",
        'frontier_RF': "RF",
        'frontier_IF': "IF",
        'frontier_AF': "AF",
        'frontier_CommF': "CommF",
        'frontier_UF': "UF"
    })
    .corr()
    .stack()
    .reset_index()
    .rename(columns={'level_0': 'frontier1', 'level_1': 'frontier2', 0: 'correlation'})
)
cor_data['correlation_label'] = cor_data['correlation'].map('{:.2f}'.format)

base = alt.Chart(cor_data, width=800, height=800).encode(
    x=alt.X('frontier1:O', title=""),
    y=alt.Y('frontier2:O', title="")    
)

# Text layer with correlation labels
# Colors are for easier readability
text = base.mark_text().encode(
    text='correlation_label',
    color=alt.condition(
        alt.datum.correlation > 0.5, 
        alt.value('white'),
        alt.value('black')
    )
)

# The correlation heatmap itself
cor_plot = base.mark_rect().encode(
    color='correlation:Q'
)

cor_plot + text # The '+' means overlaying the text and rect layer